Working idea: This network will then be used to identify hub genes for each of the early, middle and late interactions.
Load Phytophthora cinnamomi gtf file, clean up to get Gene IDs
Combine alternatively spliced transcripts (seen in transcript_id as -RA and RB)
pc.gtf <- rtracklayer::import("https://ftp.ncbi.nlm.nih.gov/genomes/genbank/protozoa/Phytophthora_cinnamomi/latest_assembly_versions/GCA_018691715.1_ASM1869171v1/GCA_018691715.1_ASM1869171v1_genomic.gtf.gz")
pc.df <- as.data.frame((pc.gtf))
pc.genes <- filter(pc.df, type == "start_codon")
pc.geneids <- select(pc.genes, gene_id, product, protein_id)
pc.annotations <- pc.geneids %>%
distinct(gene_id, .keep_all = T)
nrow(pc.annotations)
## [1] 19969
pcannot <- as.matrix(pc.annotations)
#write.table(pcannot, file = "pcgeneids.tsv",quote=FALSE,sep="\t")
Load GO terms blasted from Omicsbox
goterms <- read.delim('pc.goterms.txt', header = TRUE) %>%
select(c(SeqName, GO.IDs, GO.Names, Description))
head(goterms)
## SeqName GO.IDs
## 1 KAG6572551.1 F:GO:0005515
## 2 KAG6572580.1 F:GO:0005515
## 3 KAG6572643.1 P:GO:0015074; F:GO:0003676
## 4 KAG6572667.1 F:GO:0005515
## 5 KAG6572730.1 F:GO:0005515
## 6 KAG6572776.1 F:GO:0005515
## GO.Names
## 1 F:protein binding
## 2 F:protein binding
## 3 P:DNA integration; F:nucleic acid binding
## 4 F:protein binding
## 5 F:protein binding
## 6 F:protein binding
## Description
## 1 WD repeat-containing protein mio
## 2 CCT motif-containing protein
## 3 reverse transcriptase
## 4 hypothetical protein IUM83_19800
## 5 hypothetical protein IUM83_18690
## 6 putative PDZ-domain-containing protein
colnames(goterms) <- c("protein_id", "GO_terms", "GO_descripton", "description")
goterms.df <- dplyr::right_join(goterms, pc.annotations, by = 'protein_id')
nrow(goterms.df)
## [1] 19969
Make GO term list for enrichment analysis
splitgt.df <- as.tibble(goterms.df) %>%
separate_longer_delim(c(GO_terms, GO_descripton), delim = ";")
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` instead.
## ℹ The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
splitgt.df <- as.data.frame(splitgt.df)
#strip leading spaces
splitgt.df$GO_terms <- gsub(" ","",splitgt.df$GO_terms)
splitgt.df$GO_descripton <- gsub("^ ","",splitgt.df$GO_descripton)
head(splitgt.df, 10)
## protein_id GO_terms GO_descripton
## 1 KAG6572551.1 F:GO:0005515 F:protein binding
## 2 KAG6572580.1 F:GO:0005515 F:protein binding
## 3 KAG6572643.1 P:GO:0015074 P:DNA integration
## 4 KAG6572643.1 F:GO:0003676 F:nucleic acid binding
## 5 KAG6572667.1 F:GO:0005515 F:protein binding
## 6 KAG6572730.1 F:GO:0005515 F:protein binding
## 7 KAG6572776.1 F:GO:0005515 F:protein binding
## 8 KAG6572794.1 F:GO:0005515 F:protein binding
## 9 KAG6572837.1 F:GO:0005515 F:protein binding
## 10 KAG6572861.1 F:GO:0005515 F:protein binding
## description gene_id
## 1 WD repeat-containing protein mio IUM83_18981
## 2 CCT motif-containing protein IUM83_17625
## 3 reverse transcriptase IUM83_17651
## 4 reverse transcriptase IUM83_17651
## 5 hypothetical protein IUM83_19800 IUM83_19800
## 6 hypothetical protein IUM83_18690 IUM83_18690
## 7 putative PDZ-domain-containing protein IUM83_15555
## 8 Ankyrin repeats domain-containing protein IUM83_15538
## 9 putative autophagy-related protein IUM83_15545
## 10 WW domain IUM83_15509
## product
## 1 WD repeat-containing protein mio
## 2 CCT domain
## 3 reverse transcriptase
## 4 reverse transcriptase
## 5 hypothetical protein
## 6 hypothetical protein
## 7 putative PDZ-domain-containing protein
## 8 hypothetical protein
## 9 putative autophagy-related protein
## 10 WW domain
gu <- unique(splitgt.df$GO_terms)
head(gu,20)
## [1] "F:GO:0005515" "P:GO:0015074" "F:GO:0003676" "F:GO:0008270" "P:GO:0006355"
## [6] "F:GO:0003700" "F:GO:0005509" "P:GO:0036297" "C:GO:0043240" "P:GO:0006629"
## [11] "F:GO:0020037" "P:GO:0005975" "P:GO:0006508" "F:GO:0030248" "C:GO:0005576"
## [16] "P:GO:0006801" "F:GO:0046872" "F:GO:0005525" "P:GO:0007076" "C:GO:0016020"
gul <- lapply(1:length(gu), function(i){
mygo <- gu[i]
unique(splitgt.df[splitgt.df$GO_terms == mygo, "gene_id"])
})
names(gul) <- lapply(1:length(gu), function(i){
mygo <- gu[i]
desc <- head(splitgt.df[splitgt.df$GO_terms == mygo, "GO_descripton"],1)
id_desc <- paste(mygo,desc)
})
head(names(gul))
## [1] "F:GO:0005515 F:protein binding"
## [2] "P:GO:0015074 P:DNA integration"
## [3] "F:GO:0003676 F:nucleic acid binding"
## [4] "F:GO:0008270 F:zinc ion binding"
## [5] "P:GO:0006355 P:regulation of DNA-templated transcription"
## [6] "F:GO:0003700 F:DNA-binding transcription factor activity"
# Find the element within the list named "X"
matching_element <- gul$X
# Display the matching element
matching_element
## NULL
Assemble count matrix and coldata file
## BS01 BS02 BS03 BS04 BS05 BS06 BS07 BS08 BS09 BS10 BS11 BS12
## ENSRNA049758817 0 0 0 0 0 0 0 0 0 0 0 0
## ENSRNA049758844 0 0 0 0 0 0 0 0 0 0 0 0
## ENSRNA049758846 0 0 0 0 0 0 0 0 0 0 0 1
## ENSRNA049758848 0 0 0 1 0 0 0 0 0 0 0 0
## ENSRNA049758851 3 1 1 1 2 1 0 0 2 2 2 3
## ENSRNA049758853 0 0 0 0 0 0 0 0 0 0 0 0
## BS13 BS14 BS15 BS16 BS17 BS18 BS19 BS20 BS21 BS22 BS23 BS24
## ENSRNA049758817 0 0 0 0 0 0 0 0 0 0 0 0
## ENSRNA049758844 0 0 0 0 0 0 0 0 0 0 0 0
## ENSRNA049758846 0 0 0 0 1 0 0 0 0 0 0 0
## ENSRNA049758848 0 0 0 0 0 0 0 0 0 0 0 0
## ENSRNA049758851 0 0 0 0 0 2 0 0 1 0 1 0
## ENSRNA049758853 0 0 0 0 0 0 0 0 0 0 0 0
Import files into r
coldata <- read.table('sample_info.tsv')
Pcgenenames <- read.table('pcgeneids.tsv', row.names = 1, quote = "", sep='\t', fill = TRUE, header = TRUE)
IUM83Tanjilcountmatrix <- read.table('countmatrix.tsv')
Clean countmatrix for P. cinnamomi analysis
collabels <- colnames(IUM83Tanjilcountmatrix) <- (coldata$Sample)
colnames(IUM83Tanjilcountmatrix) <- collabels
IUM83Tanjilcountmatrix <- tibble::rownames_to_column(IUM83Tanjilcountmatrix, "gene_id")
IUM83CountMatrix <- IUM83Tanjilcountmatrix %>%
filter(str_detect(gene_id, "IUM83"))
rownames(IUM83CountMatrix) <- IUM83CountMatrix$gene_id
IUM83CountMatrix <- IUM83CountMatrix [ ,-1]
IUM83CountMatrix <- as.data.frame(IUM83CountMatrix)
IUM83CountMatrix <- IUM83CountMatrix[ ,-(1:12)]
head(IUM83CountMatrix)
## PcHyp_1 PcHyp_2 PcHyp_3 LaPc18_1 LaPc18_2 LaPc18_3 LaPc30_1
## IUM83_00001 0 0 0 0 0 0 0
## IUM83_00002 0 0 0 0 0 0 0
## IUM83_00003 0 0 0 0 0 0 0
## IUM83_00004 0 0 0 0 0 0 0
## IUM83_00005 0 0 0 0 0 0 0
## IUM83_00006 1 0 0 0 0 0 0
## LaPc30_2 LaPc30_3 LaPc48_1 LaPc48_2 LaPc48_3
## IUM83_00001 0 0 0 0 0
## IUM83_00002 0 0 0 0 0
## IUM83_00003 0 0 0 0 0
## IUM83_00004 0 0 0 0 0
## IUM83_00005 0 0 0 0 0
## IUM83_00006 0 0 0 1 2
#write.table(IUM83CountMatrix, file = "Pc_countmatrix.tsv",quote=FALSE,sep="\t")
Clean coldata for Deseq2 normalisation
colData <- coldata %>%
filter(str_detect(Sample, "Pc"))
rownames(colData) <- colData$Sample
colData <- colData [ ,-1]
Detect and remove outlier genes
#Call a function from WGCNA package that detects outliers
#Rows should be the samples and the columns genes
gsg <- goodSamplesGenes(t(IUM83CountMatrix))
## Flagging genes and samples with too many missing values...
## ..step 1
## ..step 2
summary(gsg)
## Length Class Mode
## goodGenes 19981 -none- logical
## goodSamples 12 -none- logical
## allOK 1 -none- logical
#If false, then there are outliers
gsg$allOK #False
## [1] FALSE
table(gsg$goodGenes) #3012to be excluded
##
## FALSE TRUE
## 3012 16969
table(gsg$goodSamples) #All 12 samples passed
##
## TRUE
## 12
# remove genes that are detectd as outliers
data <- IUM83CountMatrix[gsg$goodGenes == TRUE,]
Detect outlier samples
# detect outlier samples - hierarchical clustering
htree <- hclust(dist(t(data)), method = "average")
groups <- cutree(htree, k=2) # cut tree into clusters
plot(htree, labels(groups))
# draw dendogram with red borders around the clusters
rect.hclust(htree, k=2, border="red")
# Normalisation using DESeq2 package
The WGCNA library requires normalisation using the vst (variance-stabilising transform) function from the DESeq2 package
# exclude outlier samples from the column data to match the new data
#colData <- colData %>%
# filter(!row.names(.) %in% outliers)
# making sure the rownames and column names identical for the two data sets
#all(rownames(colData) == colnames(clusters))
# create DESeq Data Set
dds <- DESeqDataSetFromMatrix(countData = data,
colData = colData,
design = ~ 1) # no model, for normalisation only
# remove all genes with counts < 10 in more than 90% of samples (12*0.9=10.8 ~ 11) as suggested by WGCNA on RNAseq FAQ (https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/Rpackages/WGCNA/faq.html)
# <10 in more than 90% samples
dds90 <- dds[rowSums(counts(dds) >= 10) >= 11,]
nrow(dds90) # 5403 genes
## [1] 5403
# >= 10) >= 11,] 5403 genes <===== #less than 10 counts in %90 of samples
# >= 15) >= 11,] 4420 genes
gene.list <- row.names(dds90)
#lapply(gene.list, write, "all_genes.txt", append=TRUE)
# perform variance stabilization
dds_norm <- vst(dds90)
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
# get normalized counts
norm.counts <- assay(dds_norm) %>%
t()
your_dds <- estimateSizeFactors(dds90)
your_dds <- estimateDispersions(your_dds)
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
# Plot dispersion estimates and fits
plotDispEsts(your_dds, main = "Dispersion Trend with Local and Parametric Fits")
#Save normalised dataset for faster analysis next time
saveRDS(norm.counts, "norm.counts.rds")
# load norm.counts object
#norm.counts <- readRDS("norm.counts")
# Choose a set of soft-thresholding powers to create a scale-free network
power <- c(c(1:10), seq(from = 12, to = 30, by = 2))
# Call the network topology analysis function
sft <- pickSoftThreshold(norm.counts,
powerVector = power,
networkType = "signed",
RsquaredCut = 0.90,
verbose = 5)
## pickSoftThreshold: will use block size 5403.
## pickSoftThreshold: calculating connectivity for given powers...
## ..working on genes 1 through 5403 of 5403
## Warning: executing %dopar% sequentially: no parallel backend registered
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1 1 0.0299 7.3100 0.98700 2700 2700.0 2770
## 2 2 0.8450 4.3300 0.80100 1850 1900.0 2120
## 3 3 0.8230 2.0600 0.77600 1430 1500.0 1840
## 4 4 0.8140 1.1900 0.76600 1170 1240.0 1660
## 5 5 0.7740 0.7420 0.72300 985 1050.0 1520
## 6 6 0.6450 0.4540 0.57500 851 901.0 1420
## 7 7 0.4710 0.2520 0.37300 747 783.0 1330
## 8 8 0.1560 0.0909 -0.00455 664 686.0 1260
## 9 9 0.0318 -0.0337 -0.14200 596 605.0 1190
## 10 10 0.4220 -0.1390 0.32500 539 537.0 1140
## 11 12 0.7020 -0.2950 0.67700 450 430.0 1040
## 12 14 0.8330 -0.4070 0.83100 384 352.0 963
## 13 16 0.8950 -0.4930 0.90700 332 290.0 897
## 14 18 0.9060 -0.5680 0.91700 291 242.0 840
## 15 20 0.9010 -0.6250 0.90800 257 204.0 791
## 16 22 0.9120 -0.6780 0.92600 230 174.0 747
## 17 24 0.9380 -0.7200 0.95600 206 149.0 709
## 18 26 0.9370 -0.7550 0.95400 187 128.0 674
## 19 28 0.9340 -0.7900 0.94700 170 111.0 643
## 20 30 0.9340 -0.8210 0.95000 155 96.6 614
sft.data <- sft$fitIndices
a1 <- ggplot(sft.data, aes(Power, SFT.R.sq, label = Power)) +
geom_point() +
geom_text(nudge_y = 0.1) +
geom_hline(yintercept = 0.90, color = 'red') +
labs(x = 'Power',
y = 'Scale free topology model fit, signed R^2',
title = 'Scale independence') +
theme_classic()
a2 <- ggplot(sft.data, aes(Power, mean.k., label = Power)) +
geom_point() +
geom_text(nudge_y = 0.1) +
labs(x = 'Power',
y = 'Mean Connectivity',
title = 'Mean connectivity') +
theme_classic()
grid.arrange(a1, a2, nrow = 2)
soft_power <- sft$powerEstimate #soft_power <- 18
Create network and identify modules
Note: this chunk is set not to run since the blockwise Modules function takes a long time to run. The R object has been saved and is loaded in the next chunk for a faster run time.
# load bwnet object
bwnet <- readRDS("bwnet.rds")
module_eigengenes <- bwnet$MEs %>%
orderMEs(.)
head(module_eigengenes)
## MEred MEgreenyellow MEpurple MElightcyan MEblue
## PcHyp_1 -0.47101618 -0.39629891 -0.3471852 -0.29931605 -0.4939114
## PcHyp_2 -0.46906481 -0.39987987 -0.3501685 -0.30343391 -0.4781448
## PcHyp_3 -0.46011364 -0.42693672 -0.3415601 -0.25881032 -0.4846273
## LaPc18_1 0.03517933 -0.03152391 -0.1698264 0.37517480 0.2020603
## LaPc18_2 0.07080316 -0.14832273 -0.1236591 0.33844708 0.2552885
## LaPc18_3 -0.09362904 -0.06435771 -0.3406270 -0.07896645 0.2596966
## MEturquoise MEgreen MEbrown MEyellow MEpink MEtan
## PcHyp_1 -0.3856656 0.35237712 0.2502894 0.1294777 -0.03821415 -0.08959661
## PcHyp_2 -0.3781293 0.34358147 0.2337171 0.1570362 -0.04659362 -0.08365339
## PcHyp_3 -0.3505762 0.35324187 0.2211913 0.1123597 -0.03424420 -0.11064913
## LaPc18_1 0.3608623 -0.06791258 -0.3339015 -0.4597694 -0.37617450 -0.18872178
## LaPc18_2 0.3837549 -0.07259095 -0.3876025 -0.4509801 -0.42270461 -0.19930034
## LaPc18_3 0.3880565 -0.76988911 -0.6242172 -0.3038920 -0.48482054 -0.74507581
## MEmagenta MEmidnightblue MEsalmon MEblack MEcyan
## PcHyp_1 0.2108928 0.2975619 0.26834081 0.41223138 0.29552268
## PcHyp_2 0.2299597 0.3444562 0.25841789 0.42231263 0.25600278
## PcHyp_3 0.2421529 0.3360986 0.27888460 0.40529854 0.24959661
## LaPc18_1 0.1219476 -0.2169757 -0.69699921 -0.33192073 -0.70713697
## LaPc18_2 0.1772510 -0.1149137 -0.03107396 -0.28538077 -0.39688700
## LaPc18_3 0.5129250 0.5796245 0.42898216 0.06976138 0.07427833
## MEgrey
## PcHyp_1 0.1077323
## PcHyp_2 0.1019971
## PcHyp_3 0.1071106
## LaPc18_1 0.3519742
## LaPc18_2 0.2891394
## LaPc18_3 -0.8189063
# Plot the dendrogram and the module colors before and after merging underneath
plotDendroAndColors(bwnet$dendrograms[[1]], cbind(bwnet$unmergedColors, bwnet$colors),
c("unmerged", "merged"),
dendroLabels = FALSE,
addGuide = TRUE,
hang= 0.03,
guideHang = 0.05)
# grey module = all genes that doesn't fall into other modules
module.total <- table(bwnet$colors)
#create traits file - assign 1 if a sample is a certain stage, else assign 0
Dis_traits <- colData %>%
mutate(Dis.vs.all = ifelse(grepl('Treated', Treatment), 1, 0)) %>%
select(5)
factor_levels <- unique(colData$Stage)
# transform stages into factors and define levels
colData$Stage <- factor(colData$Stage, levels = factor_levels)
traits <- binarizeCategoricalColumns(colData$Stage,
#dropFirstLevelVsAll = FALSE,
includePairwise = FALSE,
includeLevelVsAll = TRUE,
minCount = 1)
traits <- cbind(Dis_traits, traits)
# Define numbers of genes and samples
nSamples <- nrow(norm.counts)
nGenes <- ncol(norm.counts)
# correlation for module eigengenes and traits
module.trait.corr <- cor(module_eigengenes, traits, use = 'p')
module.trait.corr.pvals <- corPvalueStudent(module.trait.corr, nSamples)
# Heat map v2 (from WGCNA tutorial)
textMatrix = paste(signif(module.trait.corr, 2), "\n(",
signif(module.trait.corr.pvals, 1), ")", sep = "")
dim(textMatrix) = dim(module.trait.corr)
par(mar = c(6, 8.5, 3, 3));
labeledHeatmap(Matrix = module.trait.corr,
xLabels = colnames(module.trait.corr),
yLabels = rownames(module.trait.corr),
ySymbols = rownames(module.trait.corr),
colorLabels = FALSE,
colors = blueWhiteRed(50),
textMatrix = textMatrix,
textAdj = c(0.5, 0.5),
setStdMargins = FALSE,
cex.lab.y = 0.6,
cex.text = 0.7,
zlim = c(-1,1),
main = paste("Module-trait relationships"))
# get number of genes for each module
table(bwnet$colors)
##
## black blue brown cyan green greenyellow
## 243 881 524 57 262 95
## grey lightcyan magenta midnightblue pink purple
## 428 40 132 47 201 98
## red salmon tan turquoise yellow
## 249 65 85 1681 315
#Tag genes with module membership and store it in a table
module.gene.mapping <- as.data.frame(bwnet$colors)
Significance in brackets shows how significantly associated the
module (cluster of genes) is the trait of interest
Find modules that have significant association with disease state
Calculate the module membership and the associated p-values The module membership/intramodular connectivity is calculated as the correlation of the eigengene and the gene expression profile. This quantifies the similarity of all genes on the array to every module.
Using the gene significance you can identify genes that have a high significance for trait of interest Using the module membership measures you can identify genes with high module membership in interesting modules.
# Define a gene significance variable for Early
GS.Early <- as.numeric(cor(norm.counts, traits$data.Early.vs.all, use = "p"))
# This translates the numeric values into colors
GS.stage_earlyColor = numbers2colors(GS.Early, signed = T)
blocknumber = 1
moduleLabelsAutomatic = bwnet$colors
# Convert labels to colors for plotting
moduleColorsAutomatic = labels2colors(moduleLabelsAutomatic)
datColors = data.frame(bwnet$colors, GS.stage_earlyColor)[bwnet$blockGenes[[blocknumber]],
]
# Plot the dendrogram and the module colors underneath
plotDendroAndColors(bwnet$dendrograms[[blocknumber]], colors = datColors, groupLabels = c("Module colors",
"GS.stage.Early"), dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05)
datKME <- signedKME(norm.counts, module_eigengenes)
# Measure the correlation of the gene's module membership and the associated p-values
module.membership.measure <- cor(module_eigengenes, norm.counts, use = 'p')
module.membership.measure.pvals <- corPvalueStudent(module.membership.measure, nSamples)
# Calculate the gene significance and associated p-values
early.gene.signf.corr <- cor(norm.counts, traits$data.Early.vs.all, use = 'p')
early.gene.signf.corr.pvals <- corPvalueStudent(early.gene.signf.corr, nSamples)
colorOfColumn = substring(names(datKME), 4)
#selectModules = c("brown", "magenta", "blue", "turquoise", "lightcyan")
selectModules = colorOfColumn[colorOfColumn !="grey"]
#par(mfrow = c(4, length(selectModules)/4))
for (module in selectModules) {
column = match(module, colorOfColumn)
restModule = moduleColorsAutomatic == module
verboseScatterplot(datKME[restModule, column], GS.Early[restModule], xlab = paste("Module Membership",
module, "module"), ylab = "GS.stage_Early", main = paste("kME.", module,
"vs. GS"), col = module)
}
earlygs <- early.gene.signf.corr %>%
as.data.frame() %>%
tibble::rownames_to_column("gene_id")
earlygsid <- dplyr::right_join(pc.annotations, earlygs, by = 'gene_id')
datKme.id <- as.data.frame(datKME) %>%
rownames_to_column("gene_id")
earlyeiginengene <- dplyr::right_join(earlygsid, datKme.id, by = 'gene_id')
# Define a gene significance variable for Mid
GS.mid <- as.numeric(cor(norm.counts, traits$data.Middle.vs.all, use = "p"))
# This translates the numeric values into colors
GS.stage_midColor = numbers2colors(GS.mid, signed = T)
blocknumber = 1
moduleLabelsAutomatic = bwnet$colors
# Convert labels to colors for plotting
moduleColorsAutomatic = labels2colors(moduleLabelsAutomatic)
datColors = data.frame(bwnet$colors, GS.stage_midColor)[bwnet$blockGenes[[blocknumber]],
]
# Plot the dendrogram and the module colors underneath
plotDendroAndColors(bwnet$dendrograms[[blocknumber]], colors = datColors, groupLabels = c("Module colors",
"GS.mid"), dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05)
datKME <- signedKME(norm.counts, module_eigengenes)
# Measure the correlation of the gene's module membership and the associated p-values
module.membership.measure <- cor(module_eigengenes, norm.counts, use = 'p')
module.membership.measure.pvals <- corPvalueStudent(module.membership.measure, nSamples)
# Calculate the gene significance and associated p-values
mid.gene.signf.corr <- cor(norm.counts, traits$data.Middle.vs.all, use = 'p')
mid.gene.signf.corr.pvals <- corPvalueStudent(mid.gene.signf.corr, nSamples)
colorOfColumn = substring(names(datKME), 4)
#selectModules = c("red", "tan", "salmon","green", "magenta")
selectModules = colorOfColumn[colorOfColumn !="grey"]
#par(mfrow = c(3, length(selectModules)/3))
for (module in selectModules) {
column = match(module, colorOfColumn)
restModule = moduleColorsAutomatic == module
verboseScatterplot(datKME[restModule, column], GS.mid[restModule], xlab = paste("Module Membership",
module, "module"), ylab = "GS.stage_mid", main = paste("kME.", module,
"vs. GS"), col = module)
}
midgs <- mid.gene.signf.corr %>%
as.data.frame() %>%
tibble::rownames_to_column("gene_id")
midgsid <- dplyr::right_join(Pcgenenames, midgs, by = 'gene_id')
datKme.id <- as.data.frame(datKME) %>%
rownames_to_column("gene_id")
mideiginengene <- dplyr::right_join(midgsid, datKme.id, by = 'gene_id')
# Define a gene significance variable for Late interaction
GS.late <- as.numeric(cor(norm.counts, traits$data.Late.vs.all, use = "p"))
# This translates the numeric values into colors
GS.stage_lateColor = numbers2colors(GS.late, signed = T)
blocknumber = 1
moduleLabelsAutomatic = bwnet$colors
# Convert labels to colors for plotting
moduleColorsAutomatic = labels2colors(moduleLabelsAutomatic)
datColors = data.frame(bwnet$colors, GS.stage_lateColor)[bwnet$blockGenes[[blocknumber]],
]
# Plot the dendrogram and the module colors underneath
plotDendroAndColors(bwnet$dendrograms[[blocknumber]], colors = datColors, groupLabels = c("Module colors",
"GS.stage.Late"), dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05)
datKME <- signedKME(norm.counts, module_eigengenes)
# Measure the correlation of the gene's module membership and the associated p-values
module.membership.measure <- cor(module_eigengenes, norm.counts, use = 'p')
module.membership.measure.pvals <- corPvalueStudent(module.membership.measure, nSamples)
# Calculate the gene significance and associated p-values
late.gene.signf.corr <- cor(norm.counts, traits$data.Late.vs.all, use = 'p')
late.gene.signf.corr.pvals <- corPvalueStudent(late.gene.signf.corr, nSamples)
colorOfColumn = substring(names(datKME), 4)
#selectModules = c("green", "turquoise", "blue", "grey60", "salmon")
selectModules = colorOfColumn[colorOfColumn !="grey"]
#par(mfrow = c(3, length(selectModules)/3))
for (module in selectModules) {
column = match(module, colorOfColumn)
restModule = moduleColorsAutomatic == module
verboseScatterplot(datKME[restModule, column], GS.late[restModule], xlab = paste("Module Membership",
module, "module"), ylab = "GS.stage_Late", main = paste("kME.", module,
"vs. GS"), col = module)
}
turquoise module (Horvath cares not for the reported cor=). Instead,
Look at the y-axis: genes that have high positive module membership tend
to be highly positively correlated with the late interaction of Pc in
lupin, whereas, genes with negative values in the module they have
negative relations with the late interaction of Pc in lupin. Can select
either; genes with high positive/negative GS or high kMe (hub genes)
lategs <- late.gene.signf.corr.pvals %>%
as.data.frame() %>%
tibble::rownames_to_column("gene_id")
lategsid <- dplyr::right_join(Pcgenenames, lategs, by = 'gene_id')
datKme.id <- as.data.frame(datKME) %>%
rownames_to_column("gene_id")
lateeiginengene <- dplyr::right_join(lategsid, datKme.id, by = 'gene_id')
# chooseTopHubInEachModule returns the gene in each module with the highest connectivity, looking at all genes in the expression file
PcHubgenes <- chooseTopHubInEachModule(norm.counts,
colorh = module.gene.mapping,
omitColor = "grey",
power = 2,
type = "signed")
PcHubgenes
## black blue brown cyan green
## "IUM83_03164" "IUM83_12926" "IUM83_06000" "IUM83_08664" "IUM83_06699"
## greenyellow lightcyan magenta midnightblue pink
## "IUM83_02874" "IUM83_10668" "IUM83_04177" "IUM83_09489" "IUM83_14330"
## purple red salmon tan turquoise
## "IUM83_02522" "IUM83_07133" "IUM83_02405" "IUM83_19502" "IUM83_15977"
## yellow
## "IUM83_18642"
PcHubgenes.df <- PcHubgenes %>%
as.data.frame(row.names = NULL, stringAsFactors = FALSE)
PcHubgenes.df <- PcHubgenes.df %>%
rename("gene_id" = ".")
PcHubgenes.df <- tibble::rownames_to_column(PcHubgenes.df, "module")
PcHubgenes.df <- dplyr::right_join(pc.annotations, PcHubgenes.df, by = 'gene_id')
PcHubgenes.df
## gene_id product
## 1 IUM83_06000 hypothetical protein
## 2 IUM83_15977 heat shock protein 70
## 3 IUM83_07133 aldehyde dehydrogenase, mitochondrial precursor
## 4 IUM83_19502 Annexin protein
## 5 IUM83_09489 RNAse P, Rpr2/Rpp21 subunit
## 6 IUM83_02405 hypothetical protein
## 7 IUM83_02522 putative ribonuclease
## 8 IUM83_08664 snf7-domain-containing protein
## 9 IUM83_04177 PPR repeat protein
## 10 IUM83_03164 putative centrosome-associated protein
## 11 IUM83_10668 putative dihydrolipoamide succinyltransferase
## 12 IUM83_06699 hypothetical protein
## 13 IUM83_12926 hypothetical protein
## 14 IUM83_14330 peroxisomal biogenesis factor 11 domain-containing protein
## 15 IUM83_18642 C2 domain
## 16 IUM83_02874 putative inorganic phosphate transporter
## protein_id module
## 1 KAG6583047.1 brown
## 2 KAG6580057.1 turquoise
## 3 KAG6574467.1 red
## 4 KAG6619070.1 tan
## 5 KAG6617901.1 midnightblue
## 6 KAG6617545.1 salmon
## 7 KAG6617217.1 purple
## 8 KAG6614960.1 cyan
## 9 KAG6614562.1 magenta
## 10 KAG6612551.1 black
## 11 KAG6612350.1 lightcyan
## 12 KAG6610530.1 green
## 13 KAG6608957.1 blue
## 14 KAG6606500.1 pink
## 15 KAG6596141.1 yellow
## 16 KAG6587228.1 greenyellow
# Heatmap of old module eigen-genes and samples
#pdf(file="oldMEs.pdf",heigh=80,width=20)
library("pheatmap")
library(RColorBrewer)
MEs <- bwnet$MEs
rownames(MEs)=names(norm.counts[,9])
#pheatmap(MEs,cluster_col=T,cluster_row=T,show_rownames=F,show_colnames=T,fontsize=6)
col_ann <- colData[,c(1,4)]
rownames(col_ann) <- col_ann$Sample
col_ann <- data.frame(col_ann)
col_ann$Stage <- as.factor(col_ann$Stage)
col_ann <- col_ann[order(col_ann$Stage),]
col_ann$sample_ID <- NULL
head(col_ann, 9)
## Sample Stage
## PcHyp_1 PcHyp_1 Vegetative
## PcHyp_2 PcHyp_2 Vegetative
## PcHyp_3 PcHyp_3 Vegetative
## LaPc18_1 LaPc18_1 Early
## LaPc18_2 LaPc18_2 Early
## LaPc18_3 LaPc18_3 Early
## LaPc30_1 LaPc30_1 Middle
## LaPc30_2 LaPc30_2 Middle
## LaPc30_3 LaPc30_3 Middle
ann_color <- list("col_ann" = c("In Planta" = "purple",
"Early" = "yellow",
"Middle" = "green",
"Late" = "blue"))
data.me <- data.frame(MEs)
data.me <- data.me[order(match(rownames(data.me), rownames(col_ann))),]
#dim(data.me)
#pdf(file="newMEs.pdf",heigh=60,width=20)
rownames(MEs)=names(colData[ ,1])
pheatmap(data.me,cluster_col=T,cluster_row=F,show_rownames=F,
show_colnames=T,fontsize=6,
annotation_row = col_ann, annotation_colors = ann_color)
writeGMT <- function (object, fname ){
if (class(object) != "list") stop("object should be of class 'list'")
if(file.exists(fname)) unlink(fname)
for (iElement in 1:length(object)){
write.table(t(c(make.names(rep(names(object)[iElement],2)),object[[iElement]])),
sep="\t",quote=FALSE,
file=fname,append=TRUE,col.names=FALSE,row.names=FALSE)
}
}
writeGMT(object=gul,fname="goterms.gmt")
genesets <- read.gmt("goterms.gmt")
bg <- gene.list
get_module_genes <- function(eigengene_df) {
module_list <- c("red", "greenyellow", "purple", "lightcyan", "blue", "turquoise",
"green", "brown", "yellow", "pink", "tan", "magenta", "midnightblue",
"salmon", "black", "cyan")
gene_list <- list()
module_genes <- lapply(module_list, function(module) {
eigengene_df$gene_id[eigengene_df$V1 > 0.2 & eigengene_df[[paste0("kME", module)]] > 0.7]
})
names(module_genes) <- module_list
return(module_genes)
}
module_genes_list_early <- get_module_genes(earlyeiginengene)
module_genes_list_mid <- get_module_genes(mideiginengene)
module_genes_list_late <- get_module_genes(lateeiginengene)
get_module_genes_all <- function(eigengene_df, module_list) {
gene_list <- list()
module_genes <- lapply(module_list, function(module) {
eigengene_df$gene_id[eigengene_df$V1 > 0.2 & eigengene_df[[paste0("kME", module)]] > 0.7]
})
names(module_genes) <- module_list
return(module_genes)
}
module_list <- c("red", "greenyellow", "purple", "lightcyan", "blue", "turquoise",
"green", "brown", "yellow", "pink", "tan", "magenta", "midnightblue",
"salmon", "black", "cyan")
module_genes_list_early <- get_module_genes_all(earlyeiginengene, module_list)
results <- lapply(names(module_genes_list_early), function(module.color) {
ora_res <- as.data.frame(enricher(gene = module_genes_list_early[[module.color]],
universe = bg,
maxGSSize = 5000,
TERM2GENE = genesets,
pAdjustMethod = "fdr",
pvalueCutoff = 1,
qvalueCutoff = 1))
ora_res$geneID <- NULL
ora_res <- subset(ora_res, (ora_res$p.adjust < 0.05 & ora_res$Count >= 5))
ora_res_names <- rownames(ora_res)
ora_res$GeneRatio <- as.character(ora_res$GeneRatio)
gr <- as.numeric(sapply(strsplit(as.character(ora_res$GeneRatio), "/"), "[[", 1)) /
as.numeric(sapply(strsplit(as.character(ora_res$GeneRatio), "/"), "[[", 2))
br <- as.numeric(sapply(strsplit(as.character(ora_res$BgRatio), "/"), "[[", 1)) /
as.numeric(sapply(strsplit(as.character(ora_res$BgRatio), "/"), "[[", 2))
ora_res$es <- gr/br
ora_res <- ora_res[order(-ora_res$es), ]
ora_res$Description <- NULL
return(ora_res)
})
## --> No gene can be mapped....
## --> Expected input gene ID: IUM83_17731,IUM83_10855,IUM83_12338,IUM83_14578,IUM83_05370,IUM83_00325
## --> return NULL...
## --> No gene can be mapped....
## --> Expected input gene ID: IUM83_12734,IUM83_10070,IUM83_04864,IUM83_13520,IUM83_10855,IUM83_03578
## --> return NULL...
## --> No gene can be mapped....
## --> Expected input gene ID: IUM83_10158,IUM83_04864,IUM83_10069,IUM83_13005,IUM83_19883,IUM83_02530
## --> return NULL...
## --> No gene can be mapped....
## --> Expected input gene ID: IUM83_10855,IUM83_13355,IUM83_19790,IUM83_18049,IUM83_04642,IUM83_09871
## --> return NULL...
## --> No gene can be mapped....
## --> Expected input gene ID: IUM83_13533,IUM83_12735,IUM83_10158,IUM83_18607,IUM83_12508,IUM83_19677
## --> return NULL...
## --> No gene can be mapped....
## --> Expected input gene ID: IUM83_14369,IUM83_09310,IUM83_04863,IUM83_13305,IUM83_00832,IUM83_12734
## --> return NULL...
names(results) <- names(module_genes_list_early)
all_ids <- unlist(lapply(results, function(module_results) {
module_results$ID[module_results$es >= 3]
}))
unique_ids <- unique(all_ids)
print(unique_ids)
## [1] "C.GO.0015934.C.large.ribosomal.subunit"
## [2] "C.GO.0005576.C.extracellular.region"
## [3] "F.GO.0003735.F.structural.constituent.of.ribosome"
## [4] "C.GO.0005840.C.ribosome"
## [5] "P.GO.0006412.P.translation"
## [6] "F.GO.0004252.F.serine.type.endopeptidase.activity"
## [7] "F.GO.0004553.F.hydrolase.activity..hydrolyzing.O.glycosyl.compounds"
## [8] "F.GO.0140359.F.ABC.type.transporter.activity"
## [9] "P.GO.0055085.P.transmembrane.transport"
## [10] "C.GO.0005852.C.eukaryotic.translation.initiation.factor.3.complex"
## [11] "P.GO.0006364.P.rRNA.processing"
## [12] "C.GO.0005730.C.nucleolus"
## [13] "P.GO.0001522.P.pseudouridine.synthesis"
## [14] "P.GO.0042254.P.ribosome.biogenesis"
## [15] "F.GO.0051082.F.unfolded.protein.binding"
## [16] "F.GO.0003746.F.translation.elongation.factor.activity"
## [17] "F.GO.0003743.F.translation.initiation.factor.activity"
## [18] "F.GO.0009982.F.pseudouridine.synthase.activity"
## [19] "P.GO.0006414.P.translational.elongation"
## [20] "F.GO.0051539.F.4.iron..4.sulfur.cluster.binding"
## [21] "P.GO.0006457.P.protein.folding"
## [22] "F.GO.0003899.F.DNA.directed.5..3..RNA.polymerase.activity"
## [23] "P.GO.0006413.P.translational.initiation"
## [24] "C.GO.0015935.C.small.ribosomal.subunit"
## [25] "F.GO.0017056.F.structural.constituent.of.nuclear.pore"
# Get all unique IDs across all color modules
unique_ids <- unique(unlist(lapply(results, function(x) x$ID)))
# Create empty matrix to hold values for heatmap
heatmap_matrix <- matrix(0, nrow = length(unique_ids), ncol = length(module_list),
dimnames = list(unique_ids, module_list))
# Fill in matrix with es values where available
for (module_color in names(results)) {
module_results <- results[[module_color]]
module_genes <- module_results$ID
module_es <- module_results$es
for (i in seq_along(unique_ids)) {
if (unique_ids[i] %in% module_genes) {
heatmap_matrix[i, module_color] <- module_es[which(module_genes == unique_ids[i])]
}
}
}
eorahm <- pheatmap(heatmap_matrix,
cluster_rows = FALSE,
cluster_cols = FALSE,
color = colorRampPalette(c("white", "green"))(100),
scale = "none",
fontsize_col = 8,
fontsize_row = 6,
main = "Early Module Enrichment Score",
filename = "pheatmap1.png")
get_module_genes <- function(eigengene_df) {
module_list <- c("red", "greenyellow", "purple", "lightcyan", "blue", "turquoise",
"green", "brown", "yellow", "pink", "tan", "magenta", "midnightblue",
"salmon", "black", "cyan")
gene_list <- list()
module_genes <- lapply(module_list, function(module) {
eigengene_df$gene_id[eigengene_df$V1 > 0.2 & eigengene_df[[paste0("kME", module)]] > 0.7]
})
names(module_genes) <- module_list
return(module_genes)
}
module_genes_list_early <- get_module_genes(earlyeiginengene)
module_genes_list_mid <- get_module_genes(mideiginengene)
module_genes_list_late <- get_module_genes(lateeiginengene)
get_module_genes_all <- function(eigengene_df, module_list) {
gene_list <- list()
module_genes <- lapply(module_list, function(module) {
eigengene_df$gene_id[eigengene_df$V1 > 0.2 & eigengene_df[[paste0("kME", module)]] > 0.7]
})
names(module_genes) <- module_list
return(module_genes)
}
module_list <- c("red", "greenyellow", "purple", "lightcyan", "blue", "turquoise",
"green", "brown", "yellow", "pink", "tan", "magenta", "midnightblue",
"salmon", "black", "cyan")
module_genes_list_mid <- get_module_genes_all(mideiginengene, module_list)
results <- lapply(names(module_genes_list_mid), function(module.color) {
ora_res <- as.data.frame(enricher(gene = module_genes_list_mid[[module.color]],
universe = bg,
maxGSSize = 5000,
TERM2GENE = genesets,
pAdjustMethod = "fdr",
pvalueCutoff = 1,
qvalueCutoff = 1))
ora_res$geneID <- NULL
ora_res <- subset(ora_res, (ora_res$p.adjust < 0.05 & ora_res$Count >= 5))
ora_res_names <- rownames(ora_res)
ora_res$GeneRatio <- as.character(ora_res$GeneRatio)
gr <- as.numeric(sapply(strsplit(as.character(ora_res$GeneRatio), "/"), "[[", 1)) /
as.numeric(sapply(strsplit(as.character(ora_res$GeneRatio), "/"), "[[", 2))
br <- as.numeric(sapply(strsplit(as.character(ora_res$BgRatio), "/"), "[[", 1)) /
as.numeric(sapply(strsplit(as.character(ora_res$BgRatio), "/"), "[[", 2))
ora_res$es <- gr/br
ora_res <- ora_res[order(-ora_res$es), ]
ora_res$Description <- NULL
return(ora_res)
})
## --> No gene can be mapped....
## --> Expected input gene ID: IUM83_00833,IUM83_08971,IUM83_01656,IUM83_00469,IUM83_03578,IUM83_01546
## --> return NULL...
## --> No gene can be mapped....
## --> Expected input gene ID: IUM83_11128,IUM83_11613,IUM83_03578,IUM83_12735,IUM83_16051,IUM83_04464
## --> return NULL...
## --> No gene can be mapped....
## --> Expected input gene ID: IUM83_19790,IUM83_19107,IUM83_00604,IUM83_09870,IUM83_03576,IUM83_18607
## --> return NULL...
names(results) <- names(module_genes_list_mid)
all_ids <- unlist(lapply(results, function(module_results) {
module_results$ID[module_results$es >= 2]
}))
unique_ids <- unique(all_ids)
print(unique_ids)
## [1] "P.GO.0006096.P.glycolytic.process"
## [2] "F.GO.0010181.F.FMN.binding"
## [3] "C.GO.0005576.C.extracellular.region"
## [4] "P.GO.0006749.P.glutathione.metabolic.process"
## [5] "F.GO.0016620.F.oxidoreductase.activity..acting.on.the.aldehyde.or.oxo.group.of.donors..NAD.or.NADP.as.acceptor"
## [6] "F.GO.0004497.F.monooxygenase.activity"
## [7] "F.GO.0140359.F.ABC.type.transporter.activity"
## [8] "F.GO.0004553.F.hydrolase.activity..hydrolyzing.O.glycosyl.compounds"
## [9] "F.GO.0051287.F.NAD.binding"
## [10] "F.GO.0004252.F.serine.type.endopeptidase.activity"
## [11] "F.GO.0030170.F.pyridoxal.phosphate.binding"
## [12] "F.GO.0016491.F.oxidoreductase.activity"
## [13] "P.GO.0005975.P.carbohydrate.metabolic.process"
## [14] "F.GO.0022857.F.transmembrane.transporter.activity"
## [15] "P.GO.0055085.P.transmembrane.transport"
## [16] "P.GO.0009058.P.biosynthetic.process"
## [17] "P.GO.0006511.P.ubiquitin.dependent.protein.catabolic.process"
## [18] "C.GO.0005852.C.eukaryotic.translation.initiation.factor.3.complex"
## [19] "F.GO.0003746.F.translation.elongation.factor.activity"
## [20] "C.GO.0005730.C.nucleolus"
## [21] "F.GO.0051082.F.unfolded.protein.binding"
## [22] "F.GO.0009982.F.pseudouridine.synthase.activity"
## [23] "P.GO.0001522.P.pseudouridine.synthesis"
## [24] "P.GO.0006414.P.translational.elongation"
## [25] "F.GO.0051539.F.4.iron..4.sulfur.cluster.binding"
## [26] "F.GO.0003743.F.translation.initiation.factor.activity"
## [27] "P.GO.0006457.P.protein.folding"
## [28] "P.GO.0006364.P.rRNA.processing"
## [29] "P.GO.0042254.P.ribosome.biogenesis"
## [30] "P.GO.0006413.P.translational.initiation"
## [31] "F.GO.0003724.F.RNA.helicase.activity"
## [32] "C.GO.0005739.C.mitochondrion"
## [33] "F.GO.0050661.F.NADP.binding"
## [34] "F.GO.0016887.F.ATP.hydrolysis.activity"
## [35] "C.GO.0005743.C.mitochondrial.inner.membrane"
# Get all unique IDs across all color modules
unique_ids <- unique(unlist(lapply(results, function(x) x$ID)))
# Create empty matrix to hold values for heatmap
heatmap_matrix <- matrix(0, nrow = length(unique_ids), ncol = length(module_list),
dimnames = list(unique_ids, module_list))
# Fill in matrix with es values where available
for (module_color in names(results)) {
module_results <- results[[module_color]]
module_genes <- module_results$ID
module_es <- module_results$es
for (i in seq_along(unique_ids)) {
if (unique_ids[i] %in% module_genes) {
heatmap_matrix[i, module_color] <- module_es[which(module_genes == unique_ids[i])]
}
}
}
morahm <-pheatmap(heatmap_matrix,
cluster_rows = FALSE,
cluster_cols = FALSE,
color = colorRampPalette(c("white", "blue"))(100),
scale = "none",
fontsize_col = 8,
fontsize_row = 6,
main = "Mid Module Enrichment Score",
filename = "pheatmap2.png")
get_module_genes <- function(eigengene_df) {
module_list <- c("red", "greenyellow", "purple", "lightcyan", "blue", "turquoise",
"green", "brown", "yellow", "pink", "tan", "magenta", "midnightblue",
"salmon", "black", "cyan")
gene_list <- list()
module_genes <- lapply(module_list, function(module) {
eigengene_df$gene_id[eigengene_df$V1 > 0.2 & eigengene_df[[paste0("kME", module)]] > 0.7]
})
names(module_genes) <- module_list
return(module_genes)
}
module_genes_list_early <- get_module_genes(earlyeiginengene)
module_genes_list_mid <- get_module_genes(mideiginengene)
module_genes_list_late <- get_module_genes(lateeiginengene)
get_module_genes_all <- function(eigengene_df, module_list) {
gene_list <- list()
module_genes <- lapply(module_list, function(module) {
eigengene_df$gene_id[eigengene_df$V1 > 0.2 & eigengene_df[[paste0("kME", module)]] > 0.7]
})
names(module_genes) <- module_list
return(module_genes)
}
module_list <- c("red", "greenyellow", "purple", "lightcyan", "blue", "turquoise",
"green", "brown", "yellow", "pink", "tan", "magenta", "midnightblue",
"salmon", "black", "cyan")
module_genes_list_late <- get_module_genes_all(lateeiginengene, module_list)
results <- lapply(names(module_genes_list_late), function(module.color) {
ora_res <- as.data.frame(enricher(gene = module_genes_list_late[[module.color]],
universe = bg,
maxGSSize = 5000,
TERM2GENE = genesets,
pAdjustMethod = "fdr",
pvalueCutoff = 1,
qvalueCutoff = 1))
ora_res$geneID <- NULL
ora_res <- subset(ora_res, (ora_res$p.adjust < 0.05 & ora_res$Count >= 5))
ora_res_names <- rownames(ora_res)
ora_res$GeneRatio <- as.character(ora_res$GeneRatio)
gr <- as.numeric(sapply(strsplit(as.character(ora_res$GeneRatio), "/"), "[[", 1)) /
as.numeric(sapply(strsplit(as.character(ora_res$GeneRatio), "/"), "[[", 2))
br <- as.numeric(sapply(strsplit(as.character(ora_res$BgRatio), "/"), "[[", 1)) /
as.numeric(sapply(strsplit(as.character(ora_res$BgRatio), "/"), "[[", 2))
ora_res$es <- gr/br
ora_res <- ora_res[order(-ora_res$es), ]
ora_res$Description <- NULL
return(ora_res)
})
names(results) <- names(module_genes_list_late)
all_ids <- unlist(lapply(results, function(module_results) {
module_results$ID[module_results$es >= 3]
}))
unique_ids <- unique(all_ids)
print(unique_ids)
## [1] "C.GO.0015934.C.large.ribosomal.subunit"
## [2] "P.GO.0006096.P.glycolytic.process"
## [3] "F.GO.0004497.F.monooxygenase.activity"
## [4] "C.GO.0005576.C.extracellular.region"
## [5] "F.GO.0003735.F.structural.constituent.of.ribosome"
## [6] "F.GO.0004553.F.hydrolase.activity..hydrolyzing.O.glycosyl.compounds"
## [7] "F.GO.0140359.F.ABC.type.transporter.activity"
## [8] "F.GO.0030170.F.pyridoxal.phosphate.binding"
## [9] "F.GO.0016491.F.oxidoreductase.activity"
## [10] "F.GO.0051287.F.NAD.binding"
## [11] "C.GO.0005852.C.eukaryotic.translation.initiation.factor.3.complex"
## [12] "F.GO.0003746.F.translation.elongation.factor.activity"
## [13] "C.GO.0005730.C.nucleolus"
## [14] "P.GO.0006414.P.translational.elongation"
## [15] "P.GO.0006364.P.rRNA.processing"
## [16] "P.GO.0042254.P.ribosome.biogenesis"
## [17] "F.GO.0003743.F.translation.initiation.factor.activity"
## [18] "F.GO.0051082.F.unfolded.protein.binding"
## [19] "P.GO.0006457.P.protein.folding"
## [20] "P.GO.0006412.P.translation"
## [21] "C.GO.0005840.C.ribosome"
## [22] "C.GO.0015935.C.small.ribosomal.subunit"
## [23] "P.GO.0016567.P.protein.ubiquitination"
## [24] "P.GO.0071704.P.organic.substance.metabolic.process"
## [25] "F.GO.0004842.F.ubiquitin.protein.transferase.activity"
## [26] "P.GO.0007018.P.microtubule.based.movement"
# Get all unique IDs across all color modules
unique_ids <- unique(unlist(lapply(results, function(x) x$ID)))
# Create empty matrix to hold values for heatmap
heatmap_matrix <- matrix(0, nrow = length(unique_ids), ncol = length(module_list),
dimnames = list(unique_ids, module_list))
# Fill in matrix with es values where available
for (module_color in names(results)) {
module_results <- results[[module_color]]
module_genes <- module_results$ID
module_es <- module_results$es
for (i in seq_along(unique_ids)) {
if (unique_ids[i] %in% module_genes) {
heatmap_matrix[i, module_color] <- module_es[which(module_genes == unique_ids[i])]
}
}
}
late <-pheatmap(heatmap_matrix,
cluster_rows = FALSE,
cluster_cols = FALSE,
color = colorRampPalette(c("white", "red"))(100),
scale = "none",
fontsize_col = 8,
fontsize_row = 6,
main = "Late Module Enrichment Score",
filename = "pheatmap3.png")
hml <- as.data.frame.matrix(heatmap_matrix)
module_genes_list_late <- get_module_genes_all(lateeiginengene, module_list)
results <- lapply(names(module_genes_list_late), function(module.color) {
ora_res <- as.data.frame(enricher(gene = module_genes_list_late[[module.color]],
universe = bg,
maxGSSize = 5000,
TERM2GENE = genesets,
pAdjustMethod = "fdr",
pvalueCutoff = 1,
qvalueCutoff = 1))
ora_res$geneID <- NULL
ora_res <- subset(ora_res, (ora_res$p.adjust < 0.05 & ora_res$Count >= 5))
ora_res_names <- rownames(ora_res)
ora_res$GeneRatio <- as.character(ora_res$GeneRatio)
gr <- as.numeric(sapply(strsplit(as.character(ora_res$GeneRatio), "/"), "[[", 1)) /
as.numeric(sapply(strsplit(as.character(ora_res$GeneRatio), "/"), "[[", 2))
br <- as.numeric(sapply(strsplit(as.character(ora_res$BgRatio), "/"), "[[", 1)) /
as.numeric(sapply(strsplit(as.character(ora_res$BgRatio), "/"), "[[", 2))
ora_res$es <- gr/br
ora_res <- ora_res[order(-ora_res$es), ]
ora_res$Description <- NULL
return(ora_res)
})
names(results) <- names(module_genes_list_late)
all_ids <- unlist(lapply(results, function(module_results) {
module_results$ID[module_results$es >= 3]
}))
unique_ids <- unique(all_ids)
print(unique_ids)
## [1] "C.GO.0015934.C.large.ribosomal.subunit"
## [2] "P.GO.0006096.P.glycolytic.process"
## [3] "F.GO.0004497.F.monooxygenase.activity"
## [4] "C.GO.0005576.C.extracellular.region"
## [5] "F.GO.0003735.F.structural.constituent.of.ribosome"
## [6] "F.GO.0004553.F.hydrolase.activity..hydrolyzing.O.glycosyl.compounds"
## [7] "F.GO.0140359.F.ABC.type.transporter.activity"
## [8] "F.GO.0030170.F.pyridoxal.phosphate.binding"
## [9] "F.GO.0016491.F.oxidoreductase.activity"
## [10] "F.GO.0051287.F.NAD.binding"
## [11] "C.GO.0005852.C.eukaryotic.translation.initiation.factor.3.complex"
## [12] "F.GO.0003746.F.translation.elongation.factor.activity"
## [13] "C.GO.0005730.C.nucleolus"
## [14] "P.GO.0006414.P.translational.elongation"
## [15] "P.GO.0006364.P.rRNA.processing"
## [16] "P.GO.0042254.P.ribosome.biogenesis"
## [17] "F.GO.0003743.F.translation.initiation.factor.activity"
## [18] "F.GO.0051082.F.unfolded.protein.binding"
## [19] "P.GO.0006457.P.protein.folding"
## [20] "P.GO.0006412.P.translation"
## [21] "C.GO.0005840.C.ribosome"
## [22] "C.GO.0015935.C.small.ribosomal.subunit"
## [23] "P.GO.0016567.P.protein.ubiquitination"
## [24] "P.GO.0071704.P.organic.substance.metabolic.process"
## [25] "F.GO.0004842.F.ubiquitin.protein.transferase.activity"
## [26] "P.GO.0007018.P.microtubule.based.movement"
# Get all unique IDs across all color modules
unique_ids <- unique(unlist(lapply(results, function(x) x$ID)))
# Create empty matrix to hold values for heatmap
heatmap_matrix <- matrix(0, nrow = length(unique_ids), ncol = length(module_list),
dimnames = list(unique_ids, module_list))
# Fill in matrix with es values where available
for (module_color in names(results)) {
module_results <- results[[module_color]]
module_genes <- module_results$ID
module_es <- module_results$es
for (i in seq_along(unique_ids)) {
if (unique_ids[i] %in% module_genes) {
heatmap_matrix[i, module_color] <- module_es[which(module_genes == unique_ids[i])]
}
}
}
late <- pheatmap(heatmap_matrix,
cluster_rows = FALSE,
cluster_cols = FALSE,
color = colorRampPalette(c("white", "red"))(100),
scale = "none",
fontsize_col = 8,
fontsize_row = 6,
main = "Late Module Enrichment Score",
filename = "pheatmap4.png")
## R version 4.3.0 (2023-04-21)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_AU.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_AU.UTF-8 LC_COLLATE=en_AU.UTF-8
## [5] LC_MONETARY=en_AU.UTF-8 LC_MESSAGES=en_AU.UTF-8
## [7] LC_PAPER=en_AU.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Australia/Melbourne
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] RColorBrewer_1.1-3 pheatmap_1.0.12
## [3] clusterProfiler_4.8.1 edgeR_3.42.2
## [5] limma_3.56.0 rtracklayer_1.60.0
## [7] reshape2_1.4.4 gridExtra_2.3
## [9] lubridate_1.9.2 forcats_1.0.0
## [11] stringr_1.5.0 dplyr_1.1.2
## [13] purrr_1.0.1 readr_2.1.4
## [15] tidyr_1.3.0 tibble_3.2.1
## [17] ggplot2_3.4.2 tidyverse_2.0.0
## [19] GEOquery_2.68.0 DESeq2_1.40.1
## [21] SummarizedExperiment_1.30.1 Biobase_2.60.0
## [23] MatrixGenerics_1.12.0 matrixStats_0.63.0
## [25] GenomicRanges_1.52.0 GenomeInfoDb_1.36.0
## [27] IRanges_2.34.0 S4Vectors_0.38.1
## [29] BiocGenerics_0.46.0 WGCNA_1.72-1
## [31] fastcluster_1.2.3 dynamicTreeCut_1.63-1
## [33] kableExtra_1.3.4
##
## loaded via a namespace (and not attached):
## [1] splines_4.3.0 BiocIO_1.10.0 bitops_1.0-7
## [4] ggplotify_0.1.0 polyclip_1.10-4 preprocessCore_1.62.1
## [7] XML_3.99-0.14 rpart_4.1.19 lifecycle_1.0.3
## [10] doParallel_1.0.17 lattice_0.21-8 MASS_7.3-60
## [13] backports_1.4.1 magrittr_2.0.3 Hmisc_5.0-1
## [16] sass_0.4.5 rmarkdown_2.21 jquerylib_0.1.4
## [19] yaml_2.3.7 cowplot_1.1.1 DBI_1.1.3
## [22] zlibbioc_1.46.0 rvest_1.0.3 ggraph_2.1.0
## [25] RCurl_1.98-1.12 yulab.utils_0.0.6 nnet_7.3-18
## [28] tweenr_2.0.2 GenomeInfoDbData_1.2.10 enrichplot_1.20.0
## [31] ggrepel_0.9.3 tidytree_0.4.2 svglite_2.1.1
## [34] codetools_0.2-19 DelayedArray_0.26.1 DOSE_3.26.1
## [37] xml2_1.3.4 ggforce_0.4.1 tidyselect_1.2.0
## [40] aplot_0.1.10 farver_2.1.1 viridis_0.6.2
## [43] base64enc_0.1-3 webshot_0.5.4 GenomicAlignments_1.36.0
## [46] jsonlite_1.8.4 tidygraph_1.2.3 Formula_1.2-5
## [49] survival_3.5-5 iterators_1.0.14 systemfonts_1.0.4
## [52] foreach_1.5.2 tools_4.3.0 treeio_1.24.0
## [55] Rcpp_1.0.10 glue_1.6.2 xfun_0.39
## [58] qvalue_2.32.0 withr_2.5.0 fastmap_1.1.1
## [61] fansi_1.0.4 digest_0.6.31 timechange_0.2.0
## [64] R6_2.5.1 gridGraphics_0.5-1 colorspace_2.1-0
## [67] GO.db_3.17.0 RSQLite_2.3.1 utf8_1.2.3
## [70] generics_0.1.3 data.table_1.14.8 graphlayouts_0.8.4
## [73] httr_1.4.5 htmlwidgets_1.6.2 S4Arrays_1.0.1
## [76] scatterpie_0.1.9 pkgconfig_2.0.3 gtable_0.3.3
## [79] blob_1.2.4 impute_1.74.1 XVector_0.40.0
## [82] shadowtext_0.1.2 htmltools_0.5.5 fgsea_1.26.0
## [85] scales_1.2.1 png_0.1-8 ggfun_0.0.9
## [88] knitr_1.42 rstudioapi_0.14 tzdb_0.3.0
## [91] rjson_0.2.21 nlme_3.1-162 checkmate_2.2.0
## [94] cachem_1.0.7 parallel_4.3.0 HDO.db_0.99.1
## [97] foreign_0.8-84 AnnotationDbi_1.62.1 restfulr_0.0.15
## [100] pillar_1.9.0 grid_4.3.0 vctrs_0.6.2
## [103] cluster_2.1.4 htmlTable_2.4.1 evaluate_0.20
## [106] cli_3.6.1 locfit_1.5-9.7 compiler_4.3.0
## [109] Rsamtools_2.16.0 rlang_1.1.1 crayon_1.5.2
## [112] labeling_0.4.2 plyr_1.8.8 stringi_1.7.12
## [115] viridisLite_0.4.1 BiocParallel_1.34.0 munsell_0.5.0
## [118] Biostrings_2.68.0 lazyeval_0.2.2 GOSemSim_2.26.0
## [121] Matrix_1.5-4 patchwork_1.1.2 hms_1.1.3
## [124] bit64_4.0.5 KEGGREST_1.40.0 highr_0.10
## [127] igraph_1.4.2 memoise_2.0.1 bslib_0.4.2
## [130] ggtree_3.8.0 fastmatch_1.1-3 bit_4.0.5
## [133] downloader_0.4 gson_0.1.0 ape_5.7-1